********************************************************************************
*PROJECT: Legislature Integration and Bipartisanship: A Natural Experiment in Iceland								   
*PURPOSE: Combine MP bio & seating data and add randomizations for RI	   
********************************************************************************

// Prep Iceland MP data
// MP-session dataset with MP and neighbour details, seating and counterfactual seating (for RI)

clear all
set maxvar 32767
set mem 1g
set more off
local seatingfolder = "$path_pch\Data\raw\Seating"
local seatingintfolder = "$path_pch\Data\intermediate\Seating"
local MPfolder = "$path_pch\Data\raw\MP"
local MPintfolder = "$path_pch\Data\intermediate\MP"
local cosponfolder = "$path_pch\Data\raw\Co-Sponsorship"
local runsRI = 1000
tempfile seat_init birthday expenses
set seed 91858389 // $20 bill June 3, 2017

**************************
/* Bring seating data, initially assigned */
**************************

// bring
forv i=115/120 {
	import delim using "`seatingfolder'\althingi journals\digitised\seating_journal_`i'.csv", clear encoding("utf-8")
	replace seat = seat+(seat>=14)+2*(seat>=30) // Reflect the naming convention difference (see email from Ingvi)
	tempfile ses`i'
	save `ses`i''
}
forv i=121/123 {
	import delim "`seatingfolder'\initialassign\seating_scraped_`i'.csv", clear encoding("utf-8")
	replace seat = seat+(seat>=14)+2*(seat>=30) // Reflect the naming convention difference (see email from Ingvi)
	tempfile ses`i'
	save `ses`i''
}
forv i=124/149 {
	import delim "`seatingfolder'\initialassign\seating_scraped_`i'.csv", clear encoding("utf-8")
	tempfile ses`i'
	save `ses`i''
}
use `ses115', clear
forv i=116/149 {
	append using `ses`i''
}

// drop empty seats and speaker seats (speaker will be added manually below)
drop if inlist(mp_name,"varamanns","forseta")

// change middle name format (put . after abbreivate middle name)
split mp_name
replace mp_name2 = mp_name2 + "." if mp_name2 == "Á"
replace mp_name2 = mp_name2 + "." if mp_name2 == "Kr"
replace mp_name2 = mp_name2 + "." if length(mp_name2) == 1
replace mp_name = mp_name1 + " " + mp_name2
replace mp_name = mp_name + " " + mp_name3 if !mi(mp_name3)
drop mp_name?

// Add speakers to the data
forv l = 1/35 {
	local ob = _N + 1
	local k = `l'+114
	local speaker: word `l' of "Salome Þorkelsdóttir" "Salome Þorkelsdóttir" "Salome Þorkelsdóttir" "Salome Þorkelsdóttir" "Ólafur G. Einarsson" "Ólafur G. Einarsson" "Ólafur G. Einarsson" "Ólafur G. Einarsson" "Ólafur G. Einarsson" "Halldór Blöndal" "Halldór Blöndal" "Halldór Blöndal" "Halldór Blöndal" "Halldór Blöndal" "Halldór Blöndal" "Halldór Blöndal" "Halldór Blöndal" "Sólveig Pétursdóttir" "Sólveig Pétursdóttir" "Sturla Böðvarsson" "Sturla Böðvarsson" "Sturla Böðvarsson" "Ásta R. Jóhannesdóttir" "Ásta R. Jóhannesdóttir" "Ásta R. Jóhannesdóttir" "Ásta R. Jóhannesdóttir" "Ásta R. Jóhannesdóttir" "Einar K. Guðfinnsson" "Einar K. Guðfinnsson" "Einar K. Guðfinnsson" "Einar K. Guðfinnsson" "Steingrímur J. Sigfússon" "Unnur Brá Konráðsdóttir" "Steingrímur J. Sigfússon" "Steingrímur J. Sigfússon"
	set obs `ob'
	replace mp_name = "`speaker'" in `ob'
	replace sessionnum = `k' in `ob'
	replace seat = 31 in `ob' // The speaker's seat! (except for one case)
}
replace seat = 30 if sessionnum == 129 & seat == 31 //exceptional case where the speaker sat at 30 instead of 31.
sort sessionnum seat
duplicates drop
isid sessionnum seat 

// Will save the seat code here (instead of number)
ren seat seatno_init
g seat_init = ""

// match in our coding
local codes "S LF6 LF5 LF4 LF3 LF2 LF1 UF1 UF2 UF3 UF4 UF5 UF6 LC9 LC8 LC7 LC6"
local codes "`codes' LC5 LC4 LC3 LC2 LC1 UC1 UC2 UC3 UC4 UC5 UC6 UC7 UC8 UC9 LB13" 
local codes "`codes' LB12 LB11 LB10 LB9 LB8 LB7 LB6 LB5 LB4 LB3 LB2 LB1 UB1 UB2"
local codes "`codes' UB3 UB4 UB5 UB6 UB7 UB8 UB9 UB10 UB11 UB12 UB13"

forv i=1/57 {
	local c: word `i' of `codes'
	replace seat_init = "`c'" if seatno_init==`i'
}

drop if mi(seat_init) // drop ministers

// name changes to make merge perfect (with MP_Bio.dta)
replace mp_name = "Birkir Jón Jónsson" if mp_name=="Birkir J. Jónsson" 
replace mp_name = "Ólína Kjerúlf Þorvarðardóttir" if mp_name=="Ólína Þorvarðardóttir"
replace mp_name = "Bjarkey Olsen Gunnarsdóttir" if mp_name=="Bjarkey Gunnarsdóttir"
replace mp_name = "Ólína Kjerúlf Þorvarðardóttir" if mp_name=="Guðbjartur Hannesson" & sessionnum == 145 // Guðbjartur died before random draw happened.
replace mp_name = "Ísólfur Gylfi Pálmason" if mp_name == "?sólfur Gylfi Pálmason"
replace mp_name = "Árni M. Mathiesen" if mp_name == "Ami M. Mathiesen"
replace mp_name = "Jóhann Ársælsson" if mp_name == "Jóhann Arsælsson"
replace mp_name = "Logi Einarsson" if mp_name == "Logi Már Einarsson" & sessionnum == 146
replace mp_name = "Ólafur Ragnar Grímsson" if mp_name == "Olafur Ragnar Grímsson"
replace mp_name = "Ólafur Þ. Þórðarson" if mp_name == "Olafur Þ. Þórðarson"
replace mp_name = "Ragnar Arnalds" if mp_name == "Ragnar Amalds"
replace mp_name = "Árni Johnsen" if mp_name == "Ámi Johnsen"
replace mp_name = "Árni R. Árnason" if mp_name == "Árni R. Ámason"
replace mp_name = "Þórdís Kolbrún R. Gylfadóttir" if mp_name == "Þórdís Kolbrún R"

// merge with MP_bio prepared from Iceland_MP_bio_prep.do
*drop seatno_init
ren mp_name MP_name
ren sessionnum session_id
merge 1:1 MP_name session_id using "`MPintfolder'/MP_Bio.dta", nogen keep(1 3) assert(2 3)
save `seat_init'

**************************
// manually code designated seats for speakers, party leaders, disabled
**************************

gen random = 1

// disabled
*replace random = 2 if session_id == "125" & inlist(seat_init,"UF2") //Steingrímur J. Sigfússon: only after injuring the feet almost at the end of the seat, not affected initial assignment.
replace random = 2 if inlist(session_id,130,131) & inlist(seat_init,"UB8") // Gunnar Örlygsson: Sigurlín Margrét Sigurðardóttir was the deputy of Gunnar, and she was deaf (Sigurlin is not deputy of Gunnar anymore starting 132, so he got randomly assigned)
replace random = 2 if seat_init == "LB13" & inrange(session_id,129,141) // Helgi Hjörvar
replace random = 2 if session_id == 144 & inlist(seat_init,"LC9") // Steinunn Þóra Árnadóttir
replace random = 2 if session_id == 145 & inlist(seat_init,"LB13","LC9") // Björt Ólafsdóttir, Steinunn Þóra Árnadóttir
replace random = 2 if inlist(session_id,148,149) & inlist(seat_init,"LF2","LC9") // Inga Sæland, Guðmundur Ingi Kristinsson
// Explicit record of disabled seating starting 130 in speech data
// Also looked at people who sat at historically disabled seat two years in a row: didn't find anyone suspicious.

// speakers
replace random = 3 if seat_init == "UC9" & session_id != 129
replace random = 3 if seat_init == "UC8" & session_id == 129 //exceptional case where the speaker sat at UC8 (#30) instead of UC9 (#31).

****
/*
// Diagnose 1: potentially disabled people
gen disabled_seat = inlist(seat_init,"LC9","LF2","LB13","UB8") // historically used by any disabled MP
sort MP_id session_id
br if (disabled_seat == 1 & disabled_seat[_n+1] == 1 & MP_id == MP_id[_n+1] & random == 1) | (disabled_seat == 1 & disabled_seat[_n-1] == 1 & MP_id == MP_id[_n-1] & random == 1)
	//Steinunn Þóra Árnadóttir sat at UB8 in session 146, but this is historically a seat for deafs, different from Steinunn Þóra Árnadóttir's typical disabled seat (LC9). So not counting this as non-random.
	//Ásmundur Einar Daðason sat at UB8 twice in a row (138-139). I think this is random chance (the photos does not look disabled, and he's seating everywhere except for these two years).
	//Pétur H. Blöndal sat at LF2 in 119-120. But there is no reason to believe that he was disabled. He sat everywhere in other years.
*/
****

// leaders "Formaður þingflokks", from mostly explicit speech record (formally recognized separate randomization) (for non-explicit ones, we needed to check whether caucus leader from bio page)
replace random = 4 if inrange(session_id,131,141) & inlist(seat_init,"UB1","UC1","UF1","LB1","LC1") // We are 99.9% certain about this: coming from speach text
replace random = 4 if inlist(session_id,142,143,144,145) & inlist(seat_init,"UB1","UC1","UF1","LB1","LC1","LF1") // We are 99.9% certain about this: coming from speach text
replace random = 4 if inlist(session_id,146,147) & inlist(seat_init,"UB1","UC1","UF1","UF6","LB1","LC1","LF1") // We are 99.9% certain about this: coming from speach text
replace random = 4 if inlist(session_id,148,149) & inlist(seat_init,"UB1","UC1","UF1","UF6","LB1","LC1","LF1","LF6") // We are 99.9% certain about this: coming from speach text

// leaders "Formaður þingflokks", from Table 3 of Thorstein's article, also taking into account the caucus chairmanship record (see explanation for each case)
replace random = 4 if MP_id == 162 & inlist(session_id,115,118,119,121,122)
	// https://www.althingi.is/altext/cv/is/?nfaerslunr=162: he was caucus leader 115-122 (91-98; sjál)
	// Table 2 says he got aisle seat at sessions 117-120.
	// 118,119: easy case because he indeed appears to have aisle seat to begin with.
	// 117: not aisle seat on the record. Probably switched afterwards (Sigidur, who was written as the one who switched with Geir had aisle seat)
	// 120: both people are not in aisle seat. So not counting this as non-random. Maybe there was a mistake?
	// 115,121,122: although not recorded in the table, clearly sitting in the aisle seat. Mark as non-random.
replace random = 4 if MP_id == 501 & inrange(session_id,123,128)
	// https://www.althingi.is/altext/cv/is/?nfaerslunr=501: she was caucus leader 123-128 (98-03; sjál)
	// Table 2 says she exchange her seat in 126, which is confirmed from initial assignment.
	// She also sat at aisle seat in all other years when she was a caucus leader. So all marked as non-random.
replace random = 4 if MP_id == 491 & inrange(session_id,124,126)
	// https://www.althingi.is/altext/cv/is/?nfaerslunr=491
	// Formaður þingflokks Alþýðuflokksins 1993–1994 (117) og 1995–1996 (119/120), 
	// formaður þingflokks jafnaðarmanna 1996–1999 (121-123)
	// og formaður þingflokks Samfylkingarinnar 1999–2001 (124-126)
	// Table 2 says she exchanged her seat in 126, which is confirmed from initial assignment.
	// She also sat at aisle seat in sessions 124,125. Marked as non-random.
* replace nothing.
	// Ingibjörg Sólrún Gísladóttir (264):
	// mentioned in Table 3 for 117, seating at UF2, which is similar seat to the replacement seat
	// (Jona's UF5)... Also no record of being a caucus chair. Pass.

// leaders "Formaður þingflokks" who sat at the aisle twice in a row, verified as caucus leader from their bio pages
replace random = 4 if MP_id == 98 & inrange(session_id,127,130)
	// Formaður þingflokks Samfylkingarinnar 2001–2004 (127-130)
replace random = 4 if MP_id == 124 & inrange(session_id,130,130)
	// Formaður þingflokks sjálfstæðismanna 2003–2005 (130-131); 131 already taken care of above
replace random = 4 if MP_id == 176 & inrange(session_id,130,130)
	// Formaður þingflokks Frjálslynda flokksins 1999–2004 (124-130), aisle seat only at 130; (also at 131, which is counted above, maybe typo in bio?)
// Formaður þingflokks sjálfstæðismanna 2009–2010 og 2012–2013.

****
/*
// Diagnosis 2: potentially party leaders sitting in the aisle
gen aisle_seat = inlist(seat_init,"UF1","UC1","UB1","LF1","LC1","LB1")
sort MP_id session_id
br if (aisle_seat == 1 & aisle_seat[_n+1] == 1 & MP_id == MP_id[_n+1] & random == 1) | (aisle_seat == 1 & aisle_seat[_n-1] == 1 & MP_id == MP_id[_n-1] & random == 1)
	// Guðjón Guðmundsson 175: probably fluke. Sjal 120,121 leaders already accounted for.
	// Kolbrún Halldórsdóttir 383: probably fluke. No reason to believe that she was caucus chair. 
	// Sverrir Hermannsson 573: probably fluke. No reason to believe that he was caucus chair. 
	// Illugi Gunnarsson 687: fluke. explicit list given this year (136).
	// Margrét Tryggvadóttir 723: fluke. explicit list given this year (138).
*/
****

// All other aisle seaters, also verified as caucus leaders from bio pages.
replace random = 4 if MP_id == 488 & inlist(session_id,118)
	// 488: Formaður þingflokks Alþýðubandalagsins 1971–1975, 1979–1980, 1983–1987 og 1992–1995 (116-118).
replace random = 4 if MP_id == 563 & inlist(session_id,119,121,122)
	// 563: Formaður þingflokks Alþýðubandalagsins og óháðra 1995–1999 (119-123).
	// LF6 is not an aisle seat, but it is the other end of desk in front.
replace random = 4 if MP_id == 386 & inlist(session_id,127)
	// 386: Formaður þingflokks Framsóknarflokksins 1999–2003(124-128). Formaður þingflokks Frjálslynda flokksins 2007–2008, varaformaður 2008–2009.
replace random = 4 if MP_id == 199 & inlist(session_id,120)
	// 199: Formaður þingflokks Samtaka um kvennalista 1995–1996 og 1997–1998 (120,122).
replace random = 4 if MP_id == 390 & inlist(session_id,123)
	// 390: Formaður þingflokks Samtaka um kvennalista 1984–1985, 1996–1997 og 1998–1999. (121,123)

/* list of people where aisle-sitting and chairman period do not overlap except for already accounted for.
*sort session_id
*br if inlist(seat_init,"UB1","UC1","UF1","LB1","LC1","LF1") & random == 1 & session_id <= 130
// 347: Formaður þingflokks Samtaka um kvennalista 1993–1995 (117-118).
// 501: Formaður þingflokks sjálfstæðismanna 1998–2003 (123-128). (123-128 already taken care of)
// 429: Formaður þingflokks Alþýðubandalagsins 1988–1992 (115). Formaður þingflokks Samfylkingarinnar 2004–2006.
// 124: Formaður þingflokks sjálfstæðismanna 2003–2005 (129-131) (130-131 already taken care of)
// 630: (123-136)Formaður þingflokks óháðra 1998–1999 (123). Formaður þingflokks Vinstrihreyfingarinnar - græns framboðs 1999–2009.
// 98: Formaður þingflokks Samfylkingarinnar 2001–2004. (127-130)
// 561: Formaður þingflokks Þjóðvaka 1995–1996 (120).
// 251: Formaður þingflokks Framsóknarflokksins 2003–2007. (130-)
*/

// Summary of how I find caucus leaders non-randomly sitting in the aisle.
// 1. For sessions 131-149, there was a formal procedure
	// 1.1.  most sessions explicitly list the leaders who participated in separate lottery.
	// 1.2. For sessions 131-149 that do not explicitly give list, all of them sit at the same five spots, so I assumed that they are caucus leaders.
// 2. For sessions before 131, there was no formal procedure, and switching between party members (leader and ordinary member) happened.
	// 2.1. I first look at Table 2 of Thorstein paper, and verify if the leader indeed sat at the aisle. I also look at these leaders carefully and see whether they sat at the aisle during their tenure.
	// 2.2. I then look at all the aisle seaters and verify whether they were a leader at the period that they sat at the aisle.
// Note
	// For pre-131, this method is only legit if we're willing to assume that when switching, party members do not take into account who the neighbor is, and only think about the seat location.
	// For speech record used for this section, see "Special_seating_arrangement_from_speech_data.docx"

// create strata dummy
gen strata = random == 1

**************************
// Add coalition information (sessions 115-149)
**************************

gen coalition = 0 // government-forming coalition = 1, others = 0
replace coalition = 1 if inrange(session_id,115,118) & inlist(party_id,4,1) // 1995. Independence Party and Social Democratic Party
replace coalition = 1 if inrange(session_id,119,123) & inlist(party_id,4,3) // 1995. Independence Party and Progressive Party
replace coalition = 1 if inrange(session_id,124,128) & inlist(party_id,4,3) // 1999. Independence Party and Progressive Party
replace coalition = 1 if inrange(session_id,129,133) & inlist(party_id,4,3) // 2003. Independence Party and Progressive Party
replace coalition = 1 if inrange(session_id,134,136) & inlist(party_id,4,9) // 2007. Independence Party and The Social Democratic Alliance
replace coalition = 1 if inrange(session_id,137,141) & inlist(party_id,9,12) // 2009. The Social Democratic Alliance and the Left-Green Movement
replace coalition = 1 if inrange(session_id,142,145) & inlist(party_id,4,3) // 2013. Independence Party and Progressive Party
replace coalition = 1 if inrange(session_id,146,147) & inlist(party_id,4,16,15) // Jan 2017. Independence Party, Reform Party, Bright Future
replace coalition = 1 if inrange(session_id,148,149) & inlist(party_id,4,12,3) // Nov 2017. Independence Party, Left-Green Movement, Progressive Party

// 146-147 https://en.wikipedia.org/wiki/Cabinet_of_Bjarni_Benediktsson_(2017)
	*Independence Party (IP)
	*Reform Party (RP)
	*Bright Future (BF)

// 148-149 https://en.wikipedia.org/wiki/Cabinet_of_Katr%C3%ADn_Jakobsd%C3%B3ttir
	*Independence Party (D)
	*Left-Green Movement (V)
	*Progressive Party (Iceland) (B)

**************************
/* Prep for Randomization Inference */
**************************

preserve
	keep if random==1 & seatno_init!=.
	keep seatno_init session_id
	sort session_id seatno_init, stable
	by session_id: g seatorder = _n
	ren seatno_init seatnoRI
	tempfile availableseats
	save `availableseats'
restore

forv i=1/`runsRI' {
	g rerandom = runiform() if random==1 & seatno_init!=.
	sort session_id rerandom, stable
	by session_id: g seatorder = _n
	merge 1:1 session_id seatorder using `availableseats'
	assert _merge==3 if random==1 & seatno_init!=.
	drop _merge rerandom seatorder
	replace seatnoRI = seatno_init if seatno_init!=. & seatnoRI==.
	ren seatnoRI seatnoRI`i'
}


// match in our coding
local codes "S LF6 LF5 LF4 LF3 LF2 LF1 UF1 UF2 UF3 UF4 UF5 UF6 LC9 LC8 LC7 LC6"
local codes "`codes' LC5 LC4 LC3 LC2 LC1 UC1 UC2 UC3 UC4 UC5 UC6 UC7 UC8 UC9 LB13" 
local codes "`codes' LB12 LB11 LB10 LB9 LB8 LB7 LB6 LB5 LB4 LB3 LB2 LB1 UB1 UB2"
local codes "`codes' UB3 UB4 UB5 UB6 UB7 UB8 UB9 UB10 UB11 UB12 UB13"

forv j=1/`runsRI' {
	g seat_initRI`j' = ""
	forv i=1/57 {
		local c: word `i' of `codes'
		replace seat_initRI`j' = "`c'" if seatnoRI`j' == `i'
	}
	drop seatnoRI`j'
}

ren seat_init seat_initRI0
//ren strata strata_initRI0
tempfile temp
save `temp'


// find neighbors for RI
local needvar "MP_id gender party_id coalition strata" // neighbor variables that we will use
forv j=0/`runsRI' {

	// left and right neighbours
	forv k = 1/2 {
		local d: word `k' of U L //upper and lower number (delta = 1)
		local dn = 2*`k'-3
		use `temp', clear
		keep seat_initRI`j' session_id `needvar'
		gen num = substr(seat_initRI`j',3,.)
		destring num, replace
		replace num = num + `dn'
		tostring num, replace
		replace seat_initRI`j' = substr(seat_initRI`j',1,2)+num
		foreach var of varlist `needvar' {
			ren `var' `d'_`var'RI`j'
		}
		drop num
		tempfile temp`d'
		save `temp`d''
	}
	
	// merge neighbor info
	use `temp', clear
	merge 1:1 session_id seat_initRI`j' using `tempU', nogen keep(1 3)
	merge 1:1 session_id seat_initRI`j' using `tempL', nogen keep(1 3)
	
	
	// number of neighbors
	gen numNeighborRI`j' = !mi(U_MP_idRI`j')+!mi(L_MP_idRI`j')
	
	tempfile temp
	save `temp'
}


**************************
/* Get date of birth and merge in */
**************************
preserve
	import delimited using "`MPfolder'\\mp_dob.csv", clear enc("utf-8") 
	ren v1 MP_id
	ren v2 dob
	drop if dob=="empty" | dob=="noMatchng"
	g birthyear = substr(dob,-4,4)
	split dob, parse(.)
	ren dob1 birthday
	strip dob2, of("1234567890()") g(birthmonth)
	drop dob2
	replace birthmonth=trim(birthmonth)
	
	local icemonth = `""janúar" "febrúar" "mars" "apríl" "maí" "júní" "júlí" "ágúst" "september" "október" "nóvember" "desember""'
	local i=1
	foreach m of local icemonth {
		replace birthmonth="`i'" if birthmonth=="`m'"
		local i=`i'+1
	}

	destring birthyear birthday birthmonth, replace
	g birthdate = mdy(birthmonth,birthday,birthyear)
	drop dob
	
	foreach var of varlist birth* {
		assert `var'!=.
	}
	
	isid MP_id
	
	save `birthday'
restore

// merge on birthday
merge m:1 MP_id using `birthday', assert(2 3) keep(3) nogen

// label sessions using https://www.althingi.is/thingmenn/althingismannatal/thingsetutimi-numer-loggjafarthinga/
la de sessions 115 "1991-92" 116 "1992-93" 117 "1993-94" 118 "1994-95" 119 "1995" 120 "1995-96" 121 "1996-97" ///
	122 "1997-98" 123 "1998-99" 124 "1999" 125 "1999-2000" 126 "2000-01" 127 "2001-02" 128 "2002-03" 129 "2003" ///
	130 "2003-04" 131 "2004-05" 132 "2005-06" 133 "2006-07" 134 "2007" 135 "2007-08" 136 "2008-09" 137 "2009" ///
	138 "2009-10" 139 "2010-11" 140 "2011-12" 141 "2012-13" 142 "2013" 143 "2013-14" 144 "2014-15" 145 "2015-16" ///
	146 "2016-17" 147 "2017" 148 "2017-18" 149 "2018-19"
la val session_id sessions

// session start date (from same site)
g sessStartDate = .
local sdates `""mdy(10,1,1991)" "mdy(8,17,1992)" "mdy(10,1,1993)" "mdy(10,1,1994)" "mdy(5,16,1995)" "mdy(10,2,1995)" "mdy(10,1,1996)" "'
local sdates `"`sdates' "mdy(10,1,1997)" "mdy(10,1,1998)" "mdy(6,8,1999)" "mdy(10,1,1999)" "mdy(10,2,2000)" "mdy(10,1,2001)" "mdy(10,1,2002)""'
local sdates `"`sdates' "mdy(5,26,2003)" "mdy(10,1,2003)" "mdy(10,1,2004)" "mdy(10,1,2005)" "mdy(10,2,2006)" "mdy(5,31,2007)" "mdy(10,1,2007)""'
local sdates `"`sdates' "mdy(10,1,2008)" "mdy(5,15,2009)" "mdy(10,1,2009)" "mdy(10,1,2010)" "mdy(10,1,2011)" "mdy(9,11,2012)" "mdy(6,6,2013)""'
local sdates `"`sdates' "mdy(10,1,2013)" "mdy(9,9,2014)" "mdy(9,8,2015)" "mdy(12,6,2016)" "mdy(9,12,2017)" "mdy(12,14,2017)" "mdy(9,11,2018)""'
local i=115
foreach s of local sdates {
	replace sessStartDate = `s' if session_id==`i'
	local i=`i'+1
}
assert sessStartDate!=.

// now can generate age as of start of session
g age = (sessStartDate - birthdate)/365.25
la var age "age in years as of session start = (sessStartDate-birthdate)/365.25"

// male dummy
g male = gender=="M"
la var male "=1 if male, =0 if female"

// merge on careers data
merge 1:1 MP_id session_id using "`MPintfolder'/MP_session_careers", assert(2 3) keep(3) nogen

// check non-missing
assert const_sdate!=. & constituency_full!="" & constituency!="" & constName!="" & reykjavik!=. ///
	& southern!=. & government_party!=. & constOrder!=. & firstSessionMember!=. & sessExper!=.
foreach var of varlist ever* {
	assert `var'!=.
}

// save output
tempfile seating_MP_tmp
save `seating_MP_tmp', replace


**************************
/* Prep expenses data */
**************************
import delimited using "`MPfolder'/mp_salaries_expenses.csv", clear delim(";")

// translate variable names to english
ren ár year
ren mánuđur month
ren nafn MP_name
ren yfirflokkun category1 // google translation: overclassification
ren flokkun category2 // google translation: classification
ren upphćđ amount

// check non-missing
assert year!=. & month!=. & MP_name!="" & category1!="" & category2!="" & amount!=.

// replace categories with numbers (fix english label later)
g cat1 = .
replace cat1=1 if category1=="Fastar greiđslur"
replace cat1=2 if category1=="Ferđakostnađur innan lands"
replace cat1=3 if category1=="Ferđakostnađur utan lands"
replace cat1=4 if category1=="Launagreiđslur"
replace cat1=5 if category1=="Starfskostnađur"
replace cat1=6 if category1=="Síma- og netkostnađur"
assert cat1!=.

/* don't use this in the end
g category2_eng = category2
replace category2_eng="Other travel expenses abroad" if category2=="Annar ferðakostnaður utan lands"
replace category2_eng="Other (tunnels, taxis, etc.)" if category2=="Annað (jarðgöng, leigubílar o.fl.)"
replace category2_eng="Other wage payments" if category2=="Aðrar launagreiðslur"
replace category2_eng="Remuneration" if category2=="Biðlaun"
replace category2_eng="Per diem" if category2=="Dagpeningar"
replace category2_eng="Fuel" if category2=="Eldsneyti"
replace category2_eng="Reimbursed operating expenses" if category2=="Endurgreiddur starfskostnaður"
replace category2_eng="Fixed travel costs in the constituency" if category2=="Fastur ferðakostnaður í kjördæmi"
replace category2_eng="Fixed operating costs" if category2=="Fastur starfskostnaður"
replace category2_eng="Travel by rental car" if category2=="Ferðir með bílaleigubíl"
replace category2_eng="Trips in your own car" if category2=="Ferðir á eigin bifreið"
replace category2_eng="Domestic flights and fares" if category2=="Flugferðir og fjargjöld innan lands"
replace category2_eng="Flights abroad" if category2=="Flugferðir utan lands"
replace category2_eng="Accommodation and food costs within the country" if category2=="Gisti- og fæðiskostnaður innan lands"
replace category2_eng="Accommodation and food costs abroad" if category2=="Gisti- og fæðiskostnaður utan lands"
replace category2_eng="Housing and subsistence expenses" if category2=="Húsnæðis- og dvalarkostnaðargreiðsla"
replace category2_eng="Salary (parliamentary purchase)" if category2=="Laun (þingfararkaup)"
replace category2_eng="Telephone and network costs" if category2=="Síma- og netkostnaður"
replace category2_eng="Telephone support" if category2=="Símastyrkur"
replace category2_eng="Load on parliamentary purchases" if category2=="Álag á þingfararkaup"
assert category2_eng!=""
*/

// aggregate to year-cat1-level
bys MP_name year cat1: egen amountsum = total(amount)
keep MP_name year cat1 amountsum
duplicates drop

// reshape so one variable per category of expenses
reshape wide amountsum, i(year MP_name) j(cat1)

forv i=1/6 {
	replace amountsum`i'=0 if amountsum`i'==.
}
ren amountsum1 fixedPay
la var fixedPay "Fixed payments (by MP-year)"
ren amountsum2 travelExpIceland
la var travelExpIceland "Travel expenses within Iceland (by MP-year)"
ren amountsum3 travelExpAbroad
la var travelExpAbroad "Travel expenses abroad (by MP-year)"
ren amountsum4 wages
la var wages "Wages (by MP-year)"
ren amountsum5 operatingCost
la var operatingCost "Operating costs (by MP-year)"
ren amountsum6 telNetCost
la var telNetCost "Telephone and network costs (by MP-year)"

// recreate three categories shown in table on website here: https://www.althingi.is/altext/cv/is/laun_og_greidslur/?ar=2007&manudur=4
// already have wages and fixedPay, just need to sum remaining expenses
g expenses = travelExpIceland+travelExpAbroad+operatingCost+telNetCost

// simplify by just keeping main categories
keep MP_name year wages expenses fixedPay

// quick checks
foreach var of varlist wages expenses fixedPay {
	assert `var'!=. & `var'>=0
}

// temp save
isid MP_name year
duplicates drop
save `expenses'


**************************
/* Merge expenses data */
**************************
use `seating_MP_tmp', clear

// get first calendar year prior to current session
decode session_id, g(sessyear_tmp)
g sessyear_tmp2 = substr(sessyear,1,4)
destring sessyear_tmp2, replace
g year = sessyear_tmp2-1
drop sessyear_tmp*

// now merge in expenses data
merge m:1 MP_name year using `expenses', keep(1 3) nogen
foreach var of varlist fixedPay wages expenses {
	replace `var'=0 if `var'==. & session_id>=136
}
drop year
la var fixedPay "Althingi fixed payment for calendar year immediately prior to this session"
la var wages "Althingi wages for calendar year immediately prior to this session"
la var expenses "Althingi expenses for calendar year immediately prior to this session"

// save
save "`seatingintfolder'/seating_MP", replace

**************************
/* Now add front and back neighbours */
**************************

// pre-load front and back neighbours
insheet using "`seatingfolder'\front_back_seat.csv", comma clear names
tempfile frontback
save `frontback'


// merge on ID for front and back seat
use "`seatingintfolder'/seating_MP", clear

forv i=0/`runsRI' {
	ren seat_initRI`i' seat_init
	merge m:1 seat_init using `frontback'
	assert _merge!=1
	drop if _merge==2
	drop _merge
	ren seat_init seat_initRI`i'
	ren front_seat F_seat_initRI`i'
	ren back_seat B_seat_initRI`i'
}
save `frontback', replace


// save neighbour details to merge
local needvar "MP_id gender party_id coalition strata"
forv i=0/`runsRI' {
	keep session_id seat_initRI`i' `needvar'
	ren seat_initRI`i' N_seat_initRI`i'
	
	foreach x in `needvar' {
		ren `x' N_`x'
	}
	
	tempfile fb`i'
	save `fb`i''
	use `frontback', clear
}


// now merge on front and back neighbour details
forv i=0/`runsRI' {
	foreach x in F B {
		ren `x'_seat_initRI`i' N_seat_initRI`i'
		merge m:1 session_id N_seat_initRI`i' using `fb`i''
		drop if _merge==2
		drop _merge
		
		foreach y in `needvar' {
			ren N_`y' `x'_`y'RI`i'
		}
		ren N_seat_initRI`i' `x'_seat_initRI`i'
	}
}

drop F*seat_initRI* B*seat_initRI* 


**************************
/* Now add party left-right score */
**************************

preserve	
	import excel using "$path_pch/Data/raw/Parties/parlgov.xlsx", clear first
	keep if country_name=="Iceland"
	
	keep party_name_short party_name_english party_name party_id left_right
	ren party_id parlgov_id
	
	tempfile partyscores
	save `partyscores'
restore

// generate party ID to link with ParlGov left-right scores
g parlgov_id=.
replace parlgov_id=1342 if party_id==4
replace parlgov_id=147 if party_id==5
replace parlgov_id=1455 if party_id==3
replace parlgov_id=1360 if party_id==2
replace parlgov_id=228 if party_id==1 | party_id==8
replace parlgov_id=205 if party_id==7
replace parlgov_id=210 if party_id==12 | party_id==10
replace parlgov_id=1006 if party_id==9
replace parlgov_id=506 if party_id==11
replace parlgov_id=587 if party_id==13
replace parlgov_id=587 if party_id==14
replace parlgov_id=2205 if party_id==19
replace parlgov_id=2204 if party_id==15
replace parlgov_id=2633 if party_id==16
replace parlgov_id=2632 if party_id==17
replace parlgov_id=2655 if party_id==18

merge m:1 parlgov_id using `partyscores', keep(1 3) nogen

/* 
no matching party ID for party_id==6 (those with no party)

special cases below to make sure everyone with a party has a score:
 - code Parliamentary Party of the Social Democrats as Social Democratic Party
 - code Parliamentary Party of Independents as Left-Green Movement
 - Civic Movement -- The Movement coded before and after some name change

one party with ID from parlgov but no left-right score: Flokkur fólksins, party_id==17, People's Party

check matches make sense by looking up politicians, party_id from 1 to 19:

1 -- Social Democratic Party (but Icelandic says "People's Party", confirmed by wiki, but confusing because there is a new People's Party since 2016)
2 -- People's Alliance
3 -- Progressive Party
4 -- Independence Party
5 -- Women's List / Women's Alliance
6 -- No Party / Independent I guess (only 8 MP-session observations, no parl gov match obviously)
7 -- People's Movement (https://en.wikipedia.org/wiki/National_Awakening_(Iceland)#:~:text=National%20Awakening%20%E2%80%93%20People's%20Movement%20(Icelandic,future%20Prime%20Minister%20of%20Iceland.)
8 -- Parliamentary Party of the Social Democrats (https://www.althingi.is/thingmenn/thingflokkar/thingflokkur-jafnadarmanna/) -- Social Democratic Party and National Awakening formed a unified parliamentary group, just code as Social Democratic Party (see https://en.wikipedia.org/wiki/1995_Icelandic_parliamentary_election and https://en.wikipedia.org/wiki/National_Awakening_(Iceland))
9 -- Social Democratic Alliance
10 -- Parliamentary Party of Independents, code as Left-Green since 3 of 4 MPs joined Left-Green immediately after (https://www.althingi.is/thingmenn/thingflokkar/thingflokkur-ohadra/)
11 -- Liberal Party
12 -- Left-Green Movement
13 -- Civic Movement -- The Movement
14 -- Civic Movement -- The Movement (some name changes but with similar MPs, so this comes up twice)
15 -- Bright Future
16 -- Revival
17 -- People's Party
18 -- Centre Party
19 -- Pirate Party
*/

// label
la var parlgov_id "Party ID from parlgov data"
la var party_name_short "Short party name from parlgov data"
la var party_name_english "English party name from parlgov data"
la var party_name "Icelandic party name from parlgov data"
la var left_right "Parlgov party score 0-10: time-invariant unweighted mean info from party expert surveys"


// save output
sort MP_id session_id, stable
save "`seatingintfolder'\seating_MP_frontback.dta", replace

beep
